# -*- coding: utf-8 -*-
import scrapy
from AQI.items import AqiItem
from scrapy.spiders import CrawlSpider, Rule
from scrapy.linkextractors import LinkExtractor
class AqiSpider(CrawlSpider):
    name = 'aqi_crawl'
    allowed_domains = ['aqistudy.cn']

    start_urls = ['https://www.aqistudy.cn/historydata/']

    # 获取所有的城市名字　城市url
    rules = (
        # 获取所有城市的链接　发送请求
        Rule(LinkExtractor(allow='monthdata')),
        # 获取所有月份的链接　发送请求　
        Rule(LinkExtractor(allow='daydata'), callback='parse_day'),
    )

    def parse_day(self, response):
        item = AqiItem()
        # 解析　城市名字
        city_name = response.xpath('//*[@id="title"]/text()').extract_first()[8:-11]
        item['city_name'] = city_name
        # 获取所有的行数
        tr_list = response.xpath('//tr')
        tr_list.pop(0)
        for tr in tr_list:

            # 日期
            item['date'] = tr.xpath('td[1]/text()').extract_first()
            # 质量等级
            item['level'] = tr.xpath('td[3]//text()').extract_first()

            print(item)
            yield item









